mgcViz is an extension of the mgcv R package, which extends the basic visualizations provided by mgcv using ggplot2.
library(testGam)
data("gefcom_small")
head(gefcom_small)
## Year NetDemand wM Posan Trend Dow wM_s95
## 1 2005 2.889407 16.38889 0.001370019 0.0000000000 6 8.236849
## 2 2005 2.543342 17.36111 0.004110058 0.0003961965 0 9.933947
## 3 2005 2.446159 18.88889 0.006850097 0.0007923930 1 10.794862
## 4 2005 2.190166 21.25000 0.009590136 0.0011885895 2 14.462260
## 5 2005 2.100094 20.83333 0.012330175 0.0015847861 3 16.194613
## 6 2005 2.130908 20.83333 0.015070213 0.0019809826 4 16.341327
## NetDemand.24
## 1 2.889407
## 2 2.889407
## 3 2.543342
## 4 2.446159
## 5 2.190166
## 6 2.100094
plot(gefcom_small$NetDemand)
library(mgcv)
fit1 <- gam(formula = NetDemand ~ NetDemand.24 + Dow + Trend +
s(wM) + s(wM_s95) + s(Posan, bs = "cc"),
data = gefcom_small)
Visualise the seasonal effect
plot(fit1, select = 3, scale = FALSE)
This calls plot.gam
args(plot.gam)
## function (x, residuals = FALSE, rug = NULL, se = TRUE, pages = 0,
## select = NULL, scale = -1, n = 100, n2 = 40, n3 = 3, pers = FALSE,
## theta = 30, phi = 30, jit = FALSE, xlab = NULL, ylab = NULL,
## main = NULL, ylim = NULL, xlim = NULL, too.far = 0.1, all.terms = FALSE,
## shade = FALSE, shade.col = "gray80", shift = 0, trans = I,
## seWithMean = FALSE, unconditional = FALSE, by.resids = FALSE,
## scheme = 0, ...)
## NULL
Limitations of plot.gam:
...mgcViz solves these problems using a layered system based on ggplot2. To use it, we need to object of class gamViz
library(mgcViz)
fit1_v <- getViz(fit1)
class(fit1)
## [1] "gam" "glm" "lm"
class(fit1_v)
## [1] "gamViz" "gam" "glm" "lm"
We can extract individual effects using
e1 <- sm(fit1_v, 3)
class(e1)
## [1] "cyclic.smooth" "mgcv.smooth.1D"
plot effect using effect-specific methods
pl1 <- plot(e1) # calls plot.mgcv.smooth.1D()
then add layers and render
pl1 <- pl1 + l_fitLine() + l_ciLine()
pl1 # calls print.plotSmooth
In one step
plot(sm(fit1_v, 3)) + l_fitLine() + l_ciLine()
There is a variety of layers available
pl1 <- plot(sm(fit1_v, 3)) + l_ciPoly(level = 0.99) + l_fitLine(color = "red") + l_rug()
pl2 <- plot(sm(fit1_v, 3), nsim = 20) + l_simLine()
gridPrint(pl1, pl2, ncol = 2)
To get list of layers do
listLayers( plot(sm(fit1_v, 3)) )
## [1] "l_ciLine" "l_ciPoly" "l_dens2D" "l_fitDens" "l_fitLine" "l_points"
## [7] "l_rug" "l_simLine"
You can also use ggplot2 layers
plot(sm(fit1_v, 3)) + l_fitLine() + l_ciLine() + xlim(c(0.25, 0.75))
Layers from
mgcViz always start with the l_ prefix.
For routine work you can use default plots, in mgcv
plot(fit1, select = 1, scale = FALSE) # mgcv
in mgcViz
plot(fit1_v, select = 1) # mgcViz
Plotting all terms in mgcv
plot(fit1, all.terms = TRUE, pages = 1)
in
mgcViz
print(plot(fit1_v, allTerms = TRUE), pages = 1)
So workflow is
fit1 <- gam(formula = NetDemand ~ NetDemand.24 + Dow + Trend +
s(wM) + s(wM_s95) + s(Posan, bs = "cc"),
data = gefcom_small)
fit1_v <- getViz( fit1 )
Alternatively, using shortcut:
fit1_v <- gamV(NetDemand ~ NetDemand.24 + Dow + Trend +
s(wM) + s(wM_s95) + s(Posan, bs = "cc"),
data = gefcom_small)
In general:
gamV is equivalent to getViz( gam(...) )bamV is equivalent to getViz( bam(...) )gammV is equivalent to getViz( gamm(...) )qgamV is equivalent to getViz( qgam(...) )Notice that gamV has fewer arguments
args(gam)
## function (formula, family = gaussian(), data = list(), weights = NULL,
## subset = NULL, na.action, offset = NULL, method = "GCV.Cp",
## optimizer = c("outer", "newton"), control = list(), scale = 0,
## select = FALSE, knots = NULL, sp = NULL, min.sp = NULL, H = NULL,
## gamma = 1, fit = TRUE, paraPen = NULL, G = NULL, in.out = NULL,
## drop.unused.levels = TRUE, drop.intercept = NULL, ...)
## NULL
args(gamV)
## function (formula, family = gaussian(), data = list(), method = "REML",
## aGam = list(), aViz = list())
## NULL
So to do
b <- gam(formula = f, data = d,
select = TRUE, gamma = 1.5)
b <- getViz(b,
nsim = 50, post = TRUE)
we need to do
b <- gamV(formula = f, data = d,
aGam = list(select = TRUE, gamma = 1.5),
aViz = list(nsim = 50, post = TRUE))
So, going back to our example
fit1_v <- gamV(NetDemand ~ NetDemand.24 + Dow + Trend +
s(wM) + s(wM_s95) + s(Posan, bs = "cc"),
data = gefcom_small,
aViz = list(nsim = 50))
We check the residuals as a function of time (trend):
plot(gefcom_small$Trend, residuals(fit1_v), ylim = c(-1, 1))
Using mgcViz
check1D(fit1_v, x = "Trend") + l_gridCheck1D()
We need a smooth effect for trend. Same checks for several effects
pl <- check1D(fit1_v, x = list("Trend", "wM", "Posan", "Dow")) + l_gridCheck1D()
print(pl, pages = 1)
Looking at basis dimension:
par(mfrow = c(2, 2))
gam.check(fit1_v)
##
## Method: REML Optimizer: outer newton
## full convergence after 7 iterations.
## Gradient range [-0.0002371614,0.0002097874]
## (score -397.8729 & scale 0.04002065).
## Hessian positive definite, eigenvalue range [3.292137,1255.534].
## Model rank = 35 / 35
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## s(wM) 9.00 8.60 0.91 <2e-16 ***
## s(wM_s95) 9.00 8.13 1.02 0.78
## s(Posan) 8.00 7.66 1.04 0.98
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Same with mgcViz
check(fit1_v) # calls check.gamViz
##
## Method: REML Optimizer: outer newton
## full convergence after 7 iterations.
## Gradient range [-0.0002371614,0.0002097874]
## (score -397.8729 & scale 0.04002065).
## Hessian positive definite, eigenvalue range [3.292137,1255.534].
## Model rank = 35 / 35
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## s(wM) 9.00 8.60 0.91 <2e-16 ***
## s(wM_s95) 9.00 8.13 1.02 0.76
## s(Posan) 8.00 7.66 1.04 0.97
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
edf is quite close to k' for all effects, and we get a very low p-value for wM. We have to increase k a bit.
fit2_v <- gamV(NetDemand ~ NetDemand.24 + Dow + s(Trend, k = 6) + s(wM, k = 20) +
s(wM_s95, k = 15) + s(Posan, k = 15, bs = "cc"),
data = gefcom_small,
aViz = list(nsim = 50))
Re-check residuals
pl <- check1D(fit2_v, x = list("Trend", "NetDemand.24", "wM", "Posan")) + l_gridCheck1D()
print(pl, pages = 1)
Look at QQ-plots:
qq(fit2_v, rep = 100, CI = "quantile") # calls qq.gamViz
We change to a Student-t distribution
fit3_v <- gamV(NetDemand ~ NetDemand.24 + Dow + s(Trend, k = 6) + s(wM, k = 20) +
s(wM_s95, k = 15) + s(Posan, k = 15, bs = "cc"), data = gefcom_small,
family = scat,
aViz = list(nsim = 50))
AIC(fit2_v, fit3_v)
## df AIC
## fit2_v 49.98509 -1187.255
## fit3_v 52.62095 -1469.572
qq(fit3_v, rep = 100, CI = "quantile")
Load the data
library(testGam)
library(mgcv)
data("Larynx")
head(Larynx)
## region E Y x
## 1 0 7.965008 8 56
## 2 1 22.836219 22 65
## 3 2 22.094716 19 50
## 4 3 7.919352 3 63
## 5 4 13.139889 14 65
## 6 5 15.898848 9 51
Load geometry of regions
data("german.polys")
str(german.polys[1:5])
## List of 5
## $ 0: num [1:10, 1:2] 2535 2554 2577 2577 2592 ...
## $ 1: num [1:20, 1:2] 2988 2955 2934 2967 2924 ...
## $ 2: num [1:30, 1:2] 3490 3498 3537 3498 3476 ...
## $ 3: num [1:10, 1:2] 2927 2871 2871 2876 2859 ...
## $ 4: num [1:46, 1:2] 2363 2377 2426 2440 2516 ...
Calculate regions’ centres and add them to data set
X <- t( sapply(german.polys, colMeans, na.rm=TRUE) )
Larynx$lo <- X[ , 1]
Larynx$la <- X[ , 2]
Now fit model including only the effect of smoking
fit1 <- gamV(Y ~ s(x, k=20),
family = poisson,
data=Larynx,
aViz = list(nsim = 100))
Check mean residuals, binned across space
check2D(fit1, x1 = Larynx$lo, x2 = Larynx$la) + l_gridCheck2D()
The massive positive outlier is Berlin! We forgot the offset (i.e. number of cancers proportional to population of region)
fit2 <- gamV(Y ~ s(x, k=20) + offset( log(E) ),
family = poisson,
data=Larynx,
aViz = list(nsim = 100))
Repeat the check
check2D(fit2, x1 = Larynx$lo, x2 = Larynx$la) + l_gridCheck2D()
There is still some spatial effect.
Add MRF effect to capture spatial effect
fit3 <- gamV(Y ~ s(region, k = 200, bs="mrf", xt=list(polys=german.polys)) +
s(x, k=20) + offset( log(E) ),
family = poisson,
data=Larynx,
aViz = list(nsim = 100))
Repeat the checks and compare
library(viridis)
ck1 <- check2D(fit2, x1 = Larynx$lo, x2 = Larynx$la) + l_gridCheck2D() + ggtitle("Before") +
scale_fill_gradientn(colours = viridis(50, begin = 0.2), limits = c(-5, 5))
ck2 <- check2D(fit3, x1 = Larynx$lo, x2 = Larynx$la) + l_gridCheck2D() + ggtitle("After") +
scale_fill_gradientn(colours = viridis(50, begin = 0.2), limits = c(-5, 5))
gridPrint(ck1, ck2, ncol = 2)
Looks better! We look at the spatial effect
plot(fit3, select = 1)
Now we use an isotropic smooth instead
fitISO <- gamV(Y ~ s(lo, la, k = 200) +
s(x, k=20) + offset( log(E) ),
family = poisson,
data=Larynx)
Visualise the effect
plot(fitISO, select = 1)
Visualise in 3D using rgl package
plotRGL( sm(fitISO, 1), residuals = TRUE )
NB: plotRGL is not layered, it is a classic function
args(plotRGL.mgcv.smooth.2D)
## function (x, se = TRUE, n = 40, residuals = FALSE, type = "auto",
## maxpo = 1000, too.far = 0, xlab = NULL, ylab = NULL, main = NULL,
## xlim = NULL, ylim = NULL, se.mult = 1, trans = identity,
## seWithMean = FALSE, unconditional = FALSE, ...)
## NULL